R Notebook

library(broom)
library(bslib)

Attaching package: ‘bslib’

The following object is masked from ‘package:broom’:

    bootstrap

The following object is masked from ‘package:utils’:

    page
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(geojsonio)
Registered S3 method overwritten by 'geojsonsf':
  method        from   
  print.geojson geojson

Attaching package: ‘geojsonio’

The following object is masked from ‘package:base’:

    pretty
library(ggnewscale)
library(ggplot2)
library(leaflet)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
library(osmdata)
Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(RColorBrewer)
library(RSocrata)
library(rstudioapi)
library(sf)
Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(shiny)

Attaching package: ‘shiny’

The following object is masked from ‘package:geojsonio’:

    validate
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ────────────────────────────────── tidyverse 1.3.2 ──✔ tibble  3.1.8     ✔ purrr   0.3.4
✔ tidyr   1.2.1     ✔ stringr 1.4.0
✔ readr   2.1.2     ✔ forcats 0.5.2── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
✖ bslib::bootstrap() masks broom::bootstrap()
✖ dplyr::filter()    masks stats::filter()
✖ dplyr::lag()       masks stats::lag()
library(wesanderson)

# Set working directory
current_path <- rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
getwd()
[1] "/Users/xxxxoxygene/Downloads/Columbia University/Fall2022/STAT4243/Project 2/fall2022-project2-group2/doc"
# Load the restaurant dataset
data <- read.socrata(
  "https://data.cityofnewyork.us/resource/43nn-pn8j.json",
  app_token = "zTRehp1897SQtpYtBiIOUMfR4"
)

# Extract years
data$year <- format(data$inspection_date,"%Y")

# Filter the dataset
df <- data %>%
  filter(data$year >= 2019 & zipcode != "" & dba != "") %>%
  mutate(grade = replace(grade, grade == "", NA))

head(df)
# Read the geojson file containing spatial info
spdf_file <- geojson_read("../data/zip_code_040114.geojson", what = "sp")

stats_df <- spdf_file@data

# Convert it to a spatial data frame, with zip code as index
spdf_data <- tidy(spdf_file,
  region = "ZIPCODE"  # Use ZIPCODE variable as index, the index will be named "id"
)

Section I: Filtered plots

# Number of actions over time
df %>%
  group_by(year, action) %>%
  summarize(num_violations = n()) %>%
  ggplot(aes(x = year, y = num_violations, fill = action)) +
  geom_bar(stat = "identity") +
  labs(title = "Number of Actions over Time", x = "Year", y = "Count", fill = "Action") +
  scale_fill_manual(values = wes_palettes$Moonrise3[c(3, 4, 5, 1, 2)]) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  guides(fill = guide_legend(nrow = 5, byrow = TRUE))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

  
# Number of violations by critical level over time
df %>%
  filter(critical_flag != "Not Applicable") %>%
  group_by(year, critical_flag) %>%
  summarize(num_violations = n()) %>%
  ggplot(aes(x = year, y = num_violations, fill = critical_flag)) +
  geom_bar(stat = "identity") +
  labs(title = "Number of Violations by Critical Level over Time", x = "Year", y = "Count", fill = "Critical Level") +
  theme_minimal() +
  theme(legend.position = "bottom")
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

# Top 5 violations
df %>%
  group_by(year, violation_code) %>%
  summarize(num_violations = n()) %>%
  ungroup() %>%
  group_by(year) %>%
  slice_max(num_violations, n = 5) %>%
  ggplot(aes(x = year, y = num_violations, fill = violation_code)) +
  geom_bar(stat = "identity") +
  labs(title = "Top 5 Violations over Time", x = "Year", y = "Count", fill = "Violation Code") +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal() +
  theme(legend.position = "bottom")
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

Section II: Interactive plots

# Non-interactive map (population by region)
ggplot() +
  geom_polygon(data = spdf_data %>% left_join(stats_df, c("id" = "ZIPCODE")),
               aes(x = long, y = lat, group = group, fill = POPULATION),
               color = "white", size = .2) +
  theme_void() +
  coord_map() +
  scale_fill_distiller(palette = "YlGnBu", direction = 1) +
  labs(title = "Population in New York City",
       subtitle = "Neighborhoods are filled by population",
       fill = "Population")

# Number of restaurants per zipcode
Num_Rest_Code <- df %>%
  group_by(zipcode, dba, latitude, longitude) %>%
  count() %>%
  group_by(zipcode) %>%
  count()

Critical_2019_by_Code <- df %>%
  filter(year == 2019) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Critical_2020_by_Code <- df %>%
  filter(year == 2020) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Critical_2021_by_Code <- df %>%
  filter(year == 2021) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Critical_2022_by_Code <- df %>%
  filter(year == 2022) %>%
  group_by(zipcode) %>%
  summarize(Total = n())

Critical_spdf_file_2022 <- spdf_file
Critical_spdf_file_2022@data <- Critical_spdf_file_2022@data %>%
  left_join(Critical_2022_by_Code, c("ZIPCODE" = "zipcode"))

Critical_spdf_file_2019 <- spdf_file
Critical_spdf_file_2019@data <- Critical_spdf_file_2019@data %>%
  left_join(Critical_2019_by_Code, c("ZIPCODE" = "zipcode"))

Critical_spdf_file_2020 <- spdf_file
Critical_spdf_file_2020@data <- Critical_spdf_file_2020@data %>%
  left_join(Critical_2020_by_Code, c("ZIPCODE" = "zipcode"))

Critical_spdf_file_2021 <- spdf_file
Critical_spdf_file_2021@data <- Critical_spdf_file_2021@data %>%
  left_join(Critical_2021_by_Code, c("ZIPCODE" = "zipcode"))

critical_violations <- list(Critical_spdf_file_2019, Critical_spdf_file_2020, Critical_spdf_file_2021, Critical_spdf_file_2022)
names(critical_violations) <- c("2019", "2020", "2021", "2022")

nc_pal <- colorNumeric(palette = "YlOrBr", domain = Critical_spdf_file_2019@data$Total, na.color = 'transparent')

leaflet() %>%
  addProviderTiles("CartoDB") %>%
  # First layer of polygons
  addPolygons(
    data = Critical_spdf_file_2022,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0('Total Critical Violation : ', Total),
    group = '2022',
    highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
  ) %>%
  # Second layer of polygons
  addPolygons(
    data = Critical_spdf_file_2021,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0 ('Total Critical Violation : ', Total),
    group = '2021',
    highlight = highlightOptions(weight = 3, color = "red", bringToFront =  TRUE)
  ) %>%
  addLayersControl(overlayGroups = c("2022", "2021")) %>%
  addLegend(pal = nc_pal, values = Critical_spdf_file_2022$Total, opacity = 0.9, title = "Critical", position = "bottomleft")
Warning: Some values were outside the color scale and will be treated as NAWarning: Some values were outside the color scale and will be treated as NA
Total_violation <- df %>%
  group_by(zipcode) %>%
  summarize(Total = n())

Total_2019_by_Code <- df %>%
  filter(year == 2019) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Total_2020_by_Code <- df %>%
  filter(year == 2020) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Total_2021_by_Code <- df %>%
  filter(year == 2021) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Total_2022_by_Code <- df %>%
  filter(year == 2022) %>%
  group_by(zipcode) %>%
  summarize(Total = n())

Total_spdf_file_2022 <- spdf_file
Total_spdf_file_2022@data <- Total_spdf_file_2022@data %>%
  left_join(Total_2022_by_Code, c("ZIPCODE" = "zipcode"))

Total_spdf_file_2019 <- spdf_file
Total_spdf_file_2019@data <- Total_spdf_file_2019@data %>%
  left_join(Total_2019_by_Code, c("ZIPCODE" = "zipcode"))

Total_spdf_file_2020 <- spdf_file
Total_spdf_file_2020@data <- Total_spdf_file_2020@data %>%
  left_join(Total_2020_by_Code, c("ZIPCODE" = "zipcode"))

Total_spdf_file_2021 <- spdf_file
Total_spdf_file_2021@data <- Total_spdf_file_2021@data %>%
  left_join(Total_2021_by_Code, c("ZIPCODE" = "zipcode"))

total_violation <- list(Total_spdf_file_2019, Total_spdf_file_2020, Total_spdf_file_2021, Total_spdf_file_2022)
names(total_violation) <- c("2019", "2020", "2021", "2022")

nc_pal <- colorNumeric(palette = "YlOrBr", domain = Total_spdf_file_2019@data$Total, na.color = 'transparent')

leaflet()%>%
  addProviderTiles("CartoDB")%>%
  # First layer of polygons
  addPolygons(
    data = Total_spdf_file_2022,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0 ('Total Critical Violation : ', Total),
    group = '2022',
    highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
  ) %>%
  # Second layer of polygons
  addPolygons(
    data = Total_spdf_file_2021,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0 ('Total Violation : ', Total),
    group = '2021',
    highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
  ) %>%
  addLayersControl(overlayGroups = c("2022", "2021")) %>%
  # Third layer of polygons
  addPolygons(
    data = Total_spdf_file_2020,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0 ('Total Violation : ', Total),
    group = '2020',
    highlight = highlightOptions(weight  = 3, color = "red", bringToFront = TRUE)
  ) %>%
  # Fourth layer of polygons
  addPolygons(
    data = Total_spdf_file_2019,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0 ('Total Violation : ', Total),
    group = '2019',
    highlight = highlightOptions(weight  = 3, color = "red", bringToFront =  T)
  ) %>%
  addLayersControl(overlayGroups = c('2022', '2021', '2020', '2019')) %>%
  addLegend(pal = nc_pal, values = Total_spdf_file_2022$Total, opacity = 0.9, title = "Count of Total Violation", position = "bottomleft")
Warning: Some values were outside the color scale and will be treated as NAWarning: Some values were outside the color scale and will be treated as NA
violations <- list(total_violation,critical_violations)
names(violations) <- c("Number of Total Violations","Number of Crital Violations")
# Restaurant grade info
gradeInfo <- data %>%
  filter(!is.na(score)) %>%
  group_by(dba) %>%
  filter(inspection_date == max(inspection_date)) %>%
  select(dba, phone, grade, longitude, latitude, cuisine_description, zipcode, boro) %>%
  distinct(dba,.keep_all = TRUE) %>%
  ungroup()

gradeInfo$latitude <- as.numeric(gradeInfo$latitude)
gradeInfo$longitude <- as.numeric(gradeInfo$longitude)

score visualization

df$score <- as.numeric(df$score)
score_total <- df %>%
  filter(!is.na(score))%>%
  group_by(zipcode)%>%
  summarise(mean_score = mean(score))

score_2019 <- df%>%
  filter(!is.na(score)&year==2019)%>%
  group_by(zipcode)%>%
  summarise(mean_score = mean(score))
score_2019$mean_score <- round(score_2019$mean_score,2)

score_2020 <- df%>%
  filter(!is.na(score)&year==2020)%>%
  group_by(zipcode)%>%
  summarise(mean_score = mean(score))
score_2020$mean_score <- round(score_2020$mean_score,2)

score_2021 <- df%>%
  filter(!is.na(score)&year==2021)%>%
  group_by(zipcode)%>%
  summarise(mean_score = mean(score))
score_2021$mean_score <- round(score_2021$mean_score,2)

score_2022 <- df%>%
  filter(!is.na(score)&year==2022)%>%
  group_by(zipcode)%>%
  summarise(mean_score = mean(score))
score_2022$mean_score <- round(score_2022$mean_score,2)


Score_spdf_file_2019 <- spdf_file
Score_spdf_file_2019@data <- Score_spdf_file_2019@data %>%
  left_join(score_2019, c("ZIPCODE" = "zipcode"))

Score_spdf_file_2020 <- spdf_file
Score_spdf_file_2020@data <- Score_spdf_file_2020@data %>%
  left_join(score_2020, c("ZIPCODE" = "zipcode"))

Score_spdf_file_2021 <- spdf_file
Score_spdf_file_2021@data <- Score_spdf_file_2021@data %>%
  left_join(score_2021, c("ZIPCODE" = "zipcode"))

Score_spdf_file_2022 <- spdf_file
Score_spdf_file_2022@data <- Score_spdf_file_2022@data %>%
  left_join(score_2022, c("ZIPCODE" = "zipcode"))

score_map <- list(Score_spdf_file_2019,Score_spdf_file_2020,Score_spdf_file_2021,Score_spdf_file_2022)
names(score_map) <- c("2019","2020","2021","2022")

Section III: Shiny app

nc_pal <- colorNumeric(palette ="YlOrBr", domain = Total_spdf_file_2022@data$Total, na.color = 'transparent')
borough_list <- append("Overall", unique(df$boro))
cuisine_list <- append("Overall", unique(df$cuisine_description))

ui <- navbarPage(
  theme = bs_theme(bootswatch = "litera"), 
  "Restaurant Inspectation",
  tabPanel("Introduction"),
  tabPanel("Filtered Plots",
    fluidRow(
      column(3,
        selectInput("borough", "Borough", borough_list[!borough_list %in% c("0")], selected = "Overall"),
        selectInput("cuisine", "Cuisine", cuisine_list, selected = "Overall")
      ),
      column(9, 
        plotOutput("plot_action"),
        plotOutput("plot_critical_level"),
        plotOutput("plot_top_5_violation")
      )
    )
  ),
  navbarMenu("Violations Visualization",
            tabPanel("Violations Map",
    fluidRow(
      column(3,
        selectInput("type", "Type of Violations",c("Number of Total Violations", "Number of Crital Violations")),
        selectInput("time", "Year", c("2019", "2020", "2021", "2022"))
      ),
      column(9,
        leafletOutput("map", height = 600)
      )
    )
  ),
    
  tabPanel("Comparison by Years",
    fluidRow(
      column(4,
        selectInput("type_comp", "Type of Violations", c("Number of Total Violations", "Number of Crital Violations"))
      ),
      column(4,
        selectInput("time1", "Year", c("2019", "2020", "2021", "2022"))
      ),
      column(4,
        selectInput("time2", "Year", c("2019", "2020", "2021", "2022"), selected = "2020")
      )
    ),
    
    fluidRow(
      column(6,
        leafletOutput("map_comp1", height = 600)
      ),
      column(6,
        leafletOutput("map_comp2", height = 600)
      )
    )
  )),
  tabPanel("Inspection Score Visualization",
           fluidRow(column(3,
                  selectInput("score_year","Year:", c("2019", "2020", "2021", "2022"))),
           column(9, leafletOutput("score_map", height=600)))),
 
  tabPanel("Reference")
)

server <- function(input, output,session){
  # Filtered plots
  output$plot_action <- renderPlot({
    if (input$borough == 'Overall' & input$cuisine == 'Overall') {
      df_action <- df
    } else if (input$borough != 'Overall' & input$cuisine == 'Overall') {
      df_action <- df %>%
        filter(boro == input$borough)
    } else if (input$borough == 'Overall' & input$cuisine != 'Overall') {
      df_action <- df %>%
        filter(cuisine_description == input$cuisine)
    } else {
      df_action <- df %>%
        filter(boro == input$borough,
               cuisine_description == input$cuisine)
    }
    
    df_action %>%
      group_by(year, action) %>%
      summarize(num_violations = n()) %>%
      ggplot(aes(x = year, y = num_violations, fill = action)) +
      geom_bar(stat = "identity") +
      labs(title = "Number of Actions over Time", x = "Year", y = "Count", fill = "Action") +
      scale_fill_manual(values = wes_palettes$Moonrise3[c(3, 4, 5, 1, 2)]) +
      theme_minimal() +
      theme(legend.position = "bottom") +
      guides(fill = guide_legend(nrow = 5, byrow = TRUE))
  })
  
  output$plot_critical_level <- renderPlot({
    if (input$borough == 'Overall' & input$cuisine == 'Overall') {
      df_critical <- df
    } else if (input$borough != 'Overall' & input$cuisine == 'Overall') {
      df_critical <- df %>%
        filter(boro == input$borough)
    } else if (input$borough == 'Overall' & input$cuisine != 'Overall') {
      df_critical <- df %>%
        filter(cuisine_description == input$cuisine)
    } else {
      df_critical <- df %>%
        filter(boro == input$borough,
               cuisine_description == input$cuisine)
    }
    
    df_critical %>%
      filter(critical_flag != "Not Applicable") %>%
      group_by(year, critical_flag) %>%
      summarize(num_violations = n()) %>%
      ggplot(aes(x = year, y = num_violations, fill = critical_flag)) +
      geom_bar(stat = "identity") +
      labs(title = "Number of Violations by Critical Level over Time", x = "Year", y = "Count", fill = "Critical Level") +
      theme_minimal() +
      theme(legend.position = "bottom")
  })
  
  output$plot_top_5_violation <- renderPlot({
    if (input$borough == 'Overall' & input$cuisine == 'Overall') {
      df_top_5 <- df
    } else if (input$borough != 'Overall' & input$cuisine == 'Overall') {
      df_top_5 <- df %>%
        filter(boro == input$borough)
    } else if (input$borough == 'Overall' & input$cuisine != 'Overall') {
      df_top_5 <- df %>%
        filter(cuisine_description == input$cuisine)
    } else {
      df_top_5 <- df %>%
        filter(boro == input$borough,
               cuisine_description == input$cuisine)
    }
    
    df_top_5 %>%
      group_by(year, violation_code) %>%
      summarize(num_violations = n()) %>%
      ungroup() %>%
      group_by(year) %>%
      slice_max(num_violations, n = 5) %>%
      ggplot(aes(x = year, y = num_violations, fill = violation_code)) +
      geom_bar(stat = "identity") +
      labs(title = "Top 5 Violations over Time", x = "Year", y = "Count", fill = "Violation Code") +
      scale_fill_brewer(palette = "Set2") +
      theme_minimal() +
      theme(legend.position = "bottom")
  })
    
  # Interactive map
  output$map <- renderLeaflet({
    leaflet() %>%
    addProviderTiles("CartoDB") %>%
    addPolygons(
      data = violations[[input$type]][[input$time]],
      weight = 0.5,
      color = "black",
      stroke = TRUE,
      fillOpacity = 1,
      fillColor = ~nc_pal(Total),
      label = ~paste0 ('Total Violations : ', Total),
      group = '2022',
      highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
    ) %>%
    addLegend(pal = nc_pal, values = violations[[input$type]][[input$time]]$Total, opacity = 0.9, title = "Count of Total Violation", position = "bottomleft" )
  })
  
  # Interactive map compared by year  
  output$map_comp1 <- renderLeaflet({
    leaflet() %>%
    addProviderTiles("CartoDB") %>%
    addPolygons(
      data = violations[[input$type_comp]][[input$time1]],
      weight = 0.5,
      color = "black",
      stroke = TRUE,
      opacity = 1,
      fillOpacity = 1,
      fillColor = ~nc_pal(Total),
      label = ~paste0 ('Total Violations : ', Total),
      group = '2022',
      highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
    ) %>%
    addLegend(pal = nc_pal, values= violations[[input$type]][[input$time1]]$Total, opacity=0.9, title = "Count of Total Violation", position = "bottomleft" )
    })
  
  output$map_comp2 <- renderLeaflet({
    leaflet() %>%
    addProviderTiles("CartoDB") %>%
    addPolygons(
      data = violations[[input$type_comp]][[input$time2]],
      weight = 0.5,
      color = "black",
      stroke = TRUE,
      opacity = 1,
      fillOpacity = 1,
      fillColor = ~nc_pal(Total),
      label = ~paste0 ('Total Violations : ', Total),
      group = '2022',
      highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
    ) %>%
    addLegend(pal = nc_pal, values = violations[[input$type]][[input$time2]]$Total, opacity = 0.9, title = "Count of Total Violation", position = "bottomleft" )
  })
  
  # Interactive score map
  output$score_map <- renderLeaflet({
    nc_pal <- colorNumeric(palette ="Greens", domain = score_map[[3]]@data$mean_score, na.color = 'transparent')
    leaflet() %>%
    addProviderTiles("CartoDB") %>%
    addPolygons(
      data = score_map[[input$score_year]],
      weight = 0.5,
      color = "black",
      stroke = TRUE,
      fillOpacity = 1,
      fillColor = ~nc_pal(mean_score),
      label = ~paste0 ('Mean Inspection Score: ', mean_score),
      group = '2022',
      highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
    ) %>%
    addLegend(pal = nc_pal, values = score_map[[input$score_year]]$mean_score, opacity = 0.9, title = "Mean Score", position = "bottomleft" )
  })
}

shinyApp(ui,server)

Listening on http://127.0.0.1:5166
---
title: "R Notebook"
output: html_notebook
runtime: shiny
---

```{r}
library(broom)
library(bslib)
library(dplyr)
library(geojsonio)
library(ggnewscale)
library(ggplot2)
library(leaflet)
library(osmdata)
library(RColorBrewer)
library(RSocrata)
library(rstudioapi)
library(sf)
library(shiny)
library(tidyverse)
library(wesanderson)

# Set working directory
current_path <- rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
getwd()
```

```{r}
# Load the restaurant dataset
data <- read.socrata(
  "https://data.cityofnewyork.us/resource/43nn-pn8j.json",
  app_token = "zTRehp1897SQtpYtBiIOUMfR4"
)

# Extract years
data$year <- format(data$inspection_date,"%Y")

# Filter the dataset
df <- data %>%
  filter(data$year >= 2019 & zipcode != "" & dba != "") %>%
  mutate(grade = replace(grade, grade == "", NA))

head(df)
```

```{r}
# Read the geojson file containing spatial info
spdf_file <- geojson_read("../data/zip_code_040114.geojson", what = "sp")

stats_df <- spdf_file@data

# Convert it to a spatial data frame, with zip code as index
spdf_data <- tidy(spdf_file,
  region = "ZIPCODE"  # Use ZIPCODE variable as index, the index will be named "id"
)
```

# Section I: Filtered plots

```{r}
# Number of actions over time
df %>%
  group_by(year, action) %>%
  summarize(num_violations = n()) %>%
  ggplot(aes(x = year, y = num_violations, fill = action)) +
  geom_bar(stat = "identity") +
  labs(title = "Number of Actions over Time", x = "Year", y = "Count", fill = "Action") +
  scale_fill_manual(values = wes_palettes$Moonrise3[c(3, 4, 5, 1, 2)]) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  guides(fill = guide_legend(nrow = 5, byrow = TRUE))
  
# Number of violations by critical level over time
df %>%
  filter(critical_flag != "Not Applicable") %>%
  group_by(year, critical_flag) %>%
  summarize(num_violations = n()) %>%
  ggplot(aes(x = year, y = num_violations, fill = critical_flag)) +
  geom_bar(stat = "identity") +
  labs(title = "Number of Violations by Critical Level over Time", x = "Year", y = "Count", fill = "Critical Level") +
  theme_minimal() +
  theme(legend.position = "bottom")

# Top 5 violations
df %>%
  group_by(year, violation_code) %>%
  summarize(num_violations = n()) %>%
  ungroup() %>%
  group_by(year) %>%
  slice_max(num_violations, n = 5) %>%
  ggplot(aes(x = year, y = num_violations, fill = violation_code)) +
  geom_bar(stat = "identity") +
  labs(title = "Top 5 Violations over Time", x = "Year", y = "Count", fill = "Violation Code") +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal() +
  theme(legend.position = "bottom")
```

# Section II: Interactive plots

```{r}
# Non-interactive map (population by region)
ggplot() +
  geom_polygon(data = spdf_data %>% left_join(stats_df, c("id" = "ZIPCODE")),
               aes(x = long, y = lat, group = group, fill = POPULATION),
               color = "white", size = .2) +
  theme_void() +
  coord_map() +
  scale_fill_distiller(palette = "YlGnBu", direction = 1) +
  labs(title = "Population in New York City",
       subtitle = "Neighborhoods are filled by population",
       fill = "Population")
```

```{r}
# Number of restaurants per zipcode
Num_Rest_Code <- df %>%
  group_by(zipcode, dba, latitude, longitude) %>%
  count() %>%
  group_by(zipcode) %>%
  count()

Critical_2019_by_Code <- df %>%
  filter(year == 2019) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Critical_2020_by_Code <- df %>%
  filter(year == 2020) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Critical_2021_by_Code <- df %>%
  filter(year == 2021) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Critical_2022_by_Code <- df %>%
  filter(year == 2022) %>%
  group_by(zipcode) %>%
  summarize(Total = n())

Critical_spdf_file_2022 <- spdf_file
Critical_spdf_file_2022@data <- Critical_spdf_file_2022@data %>%
  left_join(Critical_2022_by_Code, c("ZIPCODE" = "zipcode"))

Critical_spdf_file_2019 <- spdf_file
Critical_spdf_file_2019@data <- Critical_spdf_file_2019@data %>%
  left_join(Critical_2019_by_Code, c("ZIPCODE" = "zipcode"))

Critical_spdf_file_2020 <- spdf_file
Critical_spdf_file_2020@data <- Critical_spdf_file_2020@data %>%
  left_join(Critical_2020_by_Code, c("ZIPCODE" = "zipcode"))

Critical_spdf_file_2021 <- spdf_file
Critical_spdf_file_2021@data <- Critical_spdf_file_2021@data %>%
  left_join(Critical_2021_by_Code, c("ZIPCODE" = "zipcode"))

critical_violations <- list(Critical_spdf_file_2019, Critical_spdf_file_2020, Critical_spdf_file_2021, Critical_spdf_file_2022)
names(critical_violations) <- c("2019", "2020", "2021", "2022")

nc_pal <- colorNumeric(palette = "YlOrBr", domain = Critical_spdf_file_2019@data$Total, na.color = 'transparent')

leaflet() %>%
  addProviderTiles("CartoDB") %>%
  # First layer of polygons
  addPolygons(
    data = Critical_spdf_file_2022,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0('Total Critical Violation : ', Total),
    group = '2022',
    highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
  ) %>%
  # Second layer of polygons
  addPolygons(
    data = Critical_spdf_file_2021,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0 ('Total Critical Violation : ', Total),
    group = '2021',
    highlight = highlightOptions(weight = 3, color = "red", bringToFront =  TRUE)
  ) %>%
  addLayersControl(overlayGroups = c("2022", "2021")) %>%
  addLegend(pal = nc_pal, values = Critical_spdf_file_2022$Total, opacity = 0.9, title = "Critical", position = "bottomleft")
```

```{r}
Total_violation <- df %>%
  group_by(zipcode) %>%
  summarize(Total = n())

Total_2019_by_Code <- df %>%
  filter(year == 2019) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Total_2020_by_Code <- df %>%
  filter(year == 2020) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Total_2021_by_Code <- df %>%
  filter(year == 2021) %>%
  group_by(zipcode) %>%
  summarize(Total = n())
  
Total_2022_by_Code <- df %>%
  filter(year == 2022) %>%
  group_by(zipcode) %>%
  summarize(Total = n())

Total_spdf_file_2022 <- spdf_file
Total_spdf_file_2022@data <- Total_spdf_file_2022@data %>%
  left_join(Total_2022_by_Code, c("ZIPCODE" = "zipcode"))

Total_spdf_file_2019 <- spdf_file
Total_spdf_file_2019@data <- Total_spdf_file_2019@data %>%
  left_join(Total_2019_by_Code, c("ZIPCODE" = "zipcode"))

Total_spdf_file_2020 <- spdf_file
Total_spdf_file_2020@data <- Total_spdf_file_2020@data %>%
  left_join(Total_2020_by_Code, c("ZIPCODE" = "zipcode"))

Total_spdf_file_2021 <- spdf_file
Total_spdf_file_2021@data <- Total_spdf_file_2021@data %>%
  left_join(Total_2021_by_Code, c("ZIPCODE" = "zipcode"))

total_violation <- list(Total_spdf_file_2019, Total_spdf_file_2020, Total_spdf_file_2021, Total_spdf_file_2022)
names(total_violation) <- c("2019", "2020", "2021", "2022")

nc_pal <- colorNumeric(palette = "YlOrBr", domain = Total_spdf_file_2019@data$Total, na.color = 'transparent')

leaflet()%>%
  addProviderTiles("CartoDB")%>%
  # First layer of polygons
  addPolygons(
    data = Total_spdf_file_2022,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0 ('Total Critical Violation : ', Total),
    group = '2022',
    highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
  ) %>%
  # Second layer of polygons
  addPolygons(
    data = Total_spdf_file_2021,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0 ('Total Violation : ', Total),
    group = '2021',
    highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
  ) %>%
  addLayersControl(overlayGroups = c("2022", "2021")) %>%
  # Third layer of polygons
  addPolygons(
    data = Total_spdf_file_2020,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0 ('Total Violation : ', Total),
    group = '2020',
    highlight = highlightOptions(weight  = 3, color = "red", bringToFront = TRUE)
  ) %>%
  # Fourth layer of polygons
  addPolygons(
    data = Total_spdf_file_2019,
    weight = 0.5,
    color = "black",
    stroke = TRUE,
    opacity = 1,
    fillColor = ~nc_pal(Total),
    label = ~paste0 ('Total Violation : ', Total),
    group = '2019',
    highlight = highlightOptions(weight  = 3, color = "red", bringToFront =  T)
  ) %>%
  addLayersControl(overlayGroups = c('2022', '2021', '2020', '2019')) %>%
  addLegend(pal = nc_pal, values = Total_spdf_file_2022$Total, opacity = 0.9, title = "Count of Total Violation", position = "bottomleft")

violations <- list(total_violation,critical_violations)
names(violations) <- c("Number of Total Violations","Number of Crital Violations")
```

```{r}
# Restaurant grade info
gradeInfo <- data %>%
  filter(!is.na(score)) %>%
  group_by(dba) %>%
  filter(inspection_date == max(inspection_date)) %>%
  select(dba, phone, grade, longitude, latitude, cuisine_description, zipcode, boro) %>%
  distinct(dba,.keep_all = TRUE) %>%
  ungroup()

gradeInfo$latitude <- as.numeric(gradeInfo$latitude)
gradeInfo$longitude <- as.numeric(gradeInfo$longitude)
```


#### score visualization
```{r}
df$score <- as.numeric(df$score)
score_total <- df %>%
  filter(!is.na(score))%>%
  group_by(zipcode)%>%
  summarise(mean_score = mean(score))

score_2019 <- df%>%
  filter(!is.na(score)&year==2019)%>%
  group_by(zipcode)%>%
  summarise(mean_score = mean(score))
score_2019$mean_score <- round(score_2019$mean_score,2)

score_2020 <- df%>%
  filter(!is.na(score)&year==2020)%>%
  group_by(zipcode)%>%
  summarise(mean_score = mean(score))
score_2020$mean_score <- round(score_2020$mean_score,2)

score_2021 <- df%>%
  filter(!is.na(score)&year==2021)%>%
  group_by(zipcode)%>%
  summarise(mean_score = mean(score))
score_2021$mean_score <- round(score_2021$mean_score,2)

score_2022 <- df%>%
  filter(!is.na(score)&year==2022)%>%
  group_by(zipcode)%>%
  summarise(mean_score = mean(score))
score_2022$mean_score <- round(score_2022$mean_score,2)


Score_spdf_file_2019 <- spdf_file
Score_spdf_file_2019@data <- Score_spdf_file_2019@data %>%
  left_join(score_2019, c("ZIPCODE" = "zipcode"))

Score_spdf_file_2020 <- spdf_file
Score_spdf_file_2020@data <- Score_spdf_file_2020@data %>%
  left_join(score_2020, c("ZIPCODE" = "zipcode"))

Score_spdf_file_2021 <- spdf_file
Score_spdf_file_2021@data <- Score_spdf_file_2021@data %>%
  left_join(score_2021, c("ZIPCODE" = "zipcode"))

Score_spdf_file_2022 <- spdf_file
Score_spdf_file_2022@data <- Score_spdf_file_2022@data %>%
  left_join(score_2022, c("ZIPCODE" = "zipcode"))

score_map <- list(Score_spdf_file_2019,Score_spdf_file_2020,Score_spdf_file_2021,Score_spdf_file_2022)
names(score_map) <- c("2019","2020","2021","2022")
```


# Section III: Shiny app

```{r}
nc_pal <- colorNumeric(palette ="YlOrBr", domain = Total_spdf_file_2022@data$Total, na.color = 'transparent')
borough_list <- append("Overall", unique(df$boro))
cuisine_list <- append("Overall", unique(df$cuisine_description))

ui <- navbarPage(
  theme = bs_theme(bootswatch = "litera"), 
  "Restaurant Inspectation",
  tabPanel("Introduction"),
  tabPanel("Filtered Plots",
    fluidRow(
      column(3,
        selectInput("borough", "Borough", borough_list[!borough_list %in% c("0")], selected = "Overall"),
        selectInput("cuisine", "Cuisine", cuisine_list, selected = "Overall")
      ),
      column(9, 
        plotOutput("plot_action"),
        plotOutput("plot_critical_level"),
        plotOutput("plot_top_5_violation")
      )
    )
  ),
  navbarMenu("Violations Visualization",
            tabPanel("Violations Map",
    fluidRow(
      column(3,
        selectInput("type", "Type of Violations",c("Number of Total Violations", "Number of Crital Violations")),
        selectInput("time", "Year", c("2019", "2020", "2021", "2022"))
      ),
      column(9,
        leafletOutput("map", height = 600)
      )
    )
  ),
    
  tabPanel("Comparison by Years",
    fluidRow(
      column(4,
        selectInput("type_comp", "Type of Violations", c("Number of Total Violations", "Number of Crital Violations"))
      ),
      column(4,
        selectInput("time1", "Year", c("2019", "2020", "2021", "2022"))
      ),
      column(4,
        selectInput("time2", "Year", c("2019", "2020", "2021", "2022"), selected = "2020")
      )
    ),
    
    fluidRow(
      column(6,
        leafletOutput("map_comp1", height = 600)
      ),
      column(6,
        leafletOutput("map_comp2", height = 600)
      )
    )
  )),
  tabPanel("Inspection Score Visualization",
           fluidRow(column(3,
                  selectInput("score_year","Year:", c("2019", "2020", "2021", "2022"))),
           column(9, leafletOutput("score_map", height=600)))),
 
  tabPanel("Reference")
)

server <- function(input, output,session){
  # Filtered plots
  output$plot_action <- renderPlot({
    if (input$borough == 'Overall' & input$cuisine == 'Overall') {
      df_action <- df
    } else if (input$borough != 'Overall' & input$cuisine == 'Overall') {
      df_action <- df %>%
        filter(boro == input$borough)
    } else if (input$borough == 'Overall' & input$cuisine != 'Overall') {
      df_action <- df %>%
        filter(cuisine_description == input$cuisine)
    } else {
      df_action <- df %>%
        filter(boro == input$borough,
               cuisine_description == input$cuisine)
    }
    
    df_action %>%
      group_by(year, action) %>%
      summarize(num_violations = n()) %>%
      ggplot(aes(x = year, y = num_violations, fill = action)) +
      geom_bar(stat = "identity") +
      labs(title = "Number of Actions over Time", x = "Year", y = "Count", fill = "Action") +
      scale_fill_manual(values = wes_palettes$Moonrise3[c(3, 4, 5, 1, 2)]) +
      theme_minimal() +
      theme(legend.position = "bottom") +
      guides(fill = guide_legend(nrow = 5, byrow = TRUE))
  })
  
  output$plot_critical_level <- renderPlot({
    if (input$borough == 'Overall' & input$cuisine == 'Overall') {
      df_critical <- df
    } else if (input$borough != 'Overall' & input$cuisine == 'Overall') {
      df_critical <- df %>%
        filter(boro == input$borough)
    } else if (input$borough == 'Overall' & input$cuisine != 'Overall') {
      df_critical <- df %>%
        filter(cuisine_description == input$cuisine)
    } else {
      df_critical <- df %>%
        filter(boro == input$borough,
               cuisine_description == input$cuisine)
    }
    
    df_critical %>%
      filter(critical_flag != "Not Applicable") %>%
      group_by(year, critical_flag) %>%
      summarize(num_violations = n()) %>%
      ggplot(aes(x = year, y = num_violations, fill = critical_flag)) +
      geom_bar(stat = "identity") +
      labs(title = "Number of Violations by Critical Level over Time", x = "Year", y = "Count", fill = "Critical Level") +
      theme_minimal() +
      theme(legend.position = "bottom")
  })
  
  output$plot_top_5_violation <- renderPlot({
    if (input$borough == 'Overall' & input$cuisine == 'Overall') {
      df_top_5 <- df
    } else if (input$borough != 'Overall' & input$cuisine == 'Overall') {
      df_top_5 <- df %>%
        filter(boro == input$borough)
    } else if (input$borough == 'Overall' & input$cuisine != 'Overall') {
      df_top_5 <- df %>%
        filter(cuisine_description == input$cuisine)
    } else {
      df_top_5 <- df %>%
        filter(boro == input$borough,
               cuisine_description == input$cuisine)
    }
    
    df_top_5 %>%
      group_by(year, violation_code) %>%
      summarize(num_violations = n()) %>%
      ungroup() %>%
      group_by(year) %>%
      slice_max(num_violations, n = 5) %>%
      ggplot(aes(x = year, y = num_violations, fill = violation_code)) +
      geom_bar(stat = "identity") +
      labs(title = "Top 5 Violations over Time", x = "Year", y = "Count", fill = "Violation Code") +
      scale_fill_brewer(palette = "Set2") +
      theme_minimal() +
      theme(legend.position = "bottom")
  })
    
  # Interactive map
  output$map <- renderLeaflet({
    leaflet() %>%
    addProviderTiles("CartoDB") %>%
    addPolygons(
      data = violations[[input$type]][[input$time]],
      weight = 0.5,
      color = "black",
      stroke = TRUE,
      fillOpacity = 1,
      fillColor = ~nc_pal(Total),
      label = ~paste0 ('Total Violations : ', Total),
      group = '2022',
      highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
    ) %>%
    addLegend(pal = nc_pal, values = violations[[input$type]][[input$time]]$Total, opacity = 0.9, title = "Count of Total Violation", position = "bottomleft" )
  })
  
  # Interactive map compared by year  
  output$map_comp1 <- renderLeaflet({
    leaflet() %>%
    addProviderTiles("CartoDB") %>%
    addPolygons(
      data = violations[[input$type_comp]][[input$time1]],
      weight = 0.5,
      color = "black",
      stroke = TRUE,
      opacity = 1,
      fillOpacity = 1,
      fillColor = ~nc_pal(Total),
      label = ~paste0 ('Total Violations : ', Total),
      group = '2022',
      highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
    ) %>%
    addLegend(pal = nc_pal, values= violations[[input$type]][[input$time1]]$Total, opacity=0.9, title = "Count of Total Violation", position = "bottomleft" )
    })
  
  output$map_comp2 <- renderLeaflet({
    leaflet() %>%
    addProviderTiles("CartoDB") %>%
    addPolygons(
      data = violations[[input$type_comp]][[input$time2]],
      weight = 0.5,
      color = "black",
      stroke = TRUE,
      opacity = 1,
      fillOpacity = 1,
      fillColor = ~nc_pal(Total),
      label = ~paste0 ('Total Violations : ', Total),
      group = '2022',
      highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
    ) %>%
    addLegend(pal = nc_pal, values = violations[[input$type]][[input$time2]]$Total, opacity = 0.9, title = "Count of Total Violation", position = "bottomleft" )
  })
  
  # Interactive score map
  output$score_map <- renderLeaflet({
    nc_pal <- colorNumeric(palette ="Greens", domain = score_map[[3]]@data$mean_score, na.color = 'transparent')
    leaflet() %>%
    addProviderTiles("CartoDB") %>%
    addPolygons(
      data = score_map[[input$score_year]],
      weight = 0.5,
      color = "black",
      stroke = TRUE,
      fillOpacity = 1,
      fillColor = ~nc_pal(mean_score),
      label = ~paste0 ('Mean Inspection Score: ', mean_score),
      group = '2022',
      highlight = highlightOptions(weight = 3, color = "red", bringToFront = TRUE)
    ) %>%
    addLegend(pal = nc_pal, values = score_map[[input$score_year]]$mean_score, opacity = 0.9, title = "Mean Score", position = "bottomleft" )
  })
}

shinyApp(ui,server)
```